library("testthat")




create_flows_matrix <- function(migration.cbsa, included.cbsas){
  Flows.all <- migration.cbsa %>%
    mutate(move5 = replace(move5, source.cbsa == dest.cbsa, 0)) %>%
    filter(source.cbsa %in% included.cbsas &  dest.cbsa %in% included.cbsas) %>%
    select(source.cbsa, dest.cbsa, move5) %>%
    spread(dest.cbsa, replace_na(move5,0))
  
  dim(Flows.all)
  
  Flows <- Flows.all[,-which(colnames(Flows.all) %in% c("source.cbsa"))]
  dim(Flows)
  Flows <- Flows %>% replace(is.na(.), 0)
  
  row.names(Flows) <-colnames(Flows)
  which(rowSums(Flows) == 0)
  empty.cbsas <- which(rowSums(Flows) == 0)
  if(length(empty.cbsas) > 0) { 
    Flows <- Flows[-empty.cbsas, -empty.cbsas]
  }
  
  Flows.all[which(rowSums(Flows) == 0),"source.cbsa"]
  
  table(rowSums(Flows) == 0)
  
  return (Flows)
}


calculate_migration_eigenvalues <- function(Flows) {
  AllexitsAdjusteddiag <- diag(rowSums(Flows))
  table(diag(AllexitsAdjusteddiag) == 0)
  
  adjusted.inverse = solve(AllexitsAdjusteddiag)
  flows.t <- t(as.matrix(Flows))
  dim(adjusted.inverse)
  dim(flows.t)
  
  MatrixforEigCalc<-adjusted.inverse%*%flows.t
  
  eig<-eigen(MatrixforEigCalc)
  
  print(paste0(c("Eigenvalue is ",eig$values[1])))
  return (eig)
}



get_city_utils_from_eigenvalues <- function(eig, Flows) { 
  
  DomEig=abs(Re(eig$vectors[,1]))

  ### recover city utilities
  ##UnadjustedforBirthsUtils is the relative utility of each city, where **higher is better**
    
  
  CityUtils=log(rowSums(Flows)*DomEig)
  RawBirthsUtils=DomEig
  
  UnadjustedforBirthsUtils=log(DomEig)
  
  #names(CityUtils)<-c(names(Flows))
  names(UnadjustedforBirthsUtils)<-c(names(Flows))

  city.utils <- data_frame(log.city.util= UnadjustedforBirthsUtils , cbsa= names(Flows) ) %>%
    mutate(cbsa = as.integer(cbsa)) %>%
    arrange(desc(log.city.util)) %>%
    mutate(
      city.util = exp(log.city.util),
      city.rank = dplyr::row_number()
    )
  
  
  row.names(city.utils) <- names(Flows)
  
  return(city.utils)
}



estimate_city_utility <- function(migration.cbsa, filter="count", cutoff_joint = 4){
  
  moves <- get_moves_in_out_from_migration_cbsa(migration.cbsa) %>%
    filter(cbsa %in% get_cbsa_pop()$cbsa)


  moves.all <- moves
  
  moves.worst <- moves.all %>%
    filter(moves.out >= 1  & moves.in ==0 & (moves.in + moves.out) >= cutoff_joint &!grepl("^99",cbsa))
  
  
  moves <- moves.all %>%
       filter(moves.in >= 1  & moves.out >= .5 & (moves.in + moves.out) >= cutoff_joint &!grepl("^99",cbsa))
  
  moves.missing <- moves.all %>%
  filter(!(cbsa %in% moves$cbsa))

  
    
  included.cbsas <- moves$cbsa
  uniquedest <-  unique(migration.cbsa$dest.cbsa)

  included.cbsas <- uniquedest[uniquedest %in% included.cbsas]
  length(included.cbsas)
  
  
  
  Flows <- create_flows_matrix(migration.cbsa , included.cbsas)
  #I am not sure why we're getting some issues here
  drop.also <- which(rowSums(Flows) == 0)
  if (length(drop.also) > 0){
    print(drop.also)
    Flows <- Flows[-drop.also, -drop.also]
  }
  
  eig <- calculate_migration_eigenvalues(Flows)
  city.utils <- get_city_utils_from_eigenvalues(eig, Flows)
  
  min.city.log.util = min(city.utils$log.city.util[!is.infinite(city.utils$log.city.util)])
  city.utils$log.city.util[is.infinite(city.utils$log.city.util)]= min.city.log.util
  
  if(nrow(moves.worst) > 0) {
    moves.worst <- moves.worst %>%
      mutate(city.rank = 1:nrow(moves.worst)+max(city.utils$city.rank),
             log.city.util = min.city.log.util,
             city.util = min(city.utils$city.util)
      ) %>%
      select(log.city.util, cbsa, city.util, city.rank)
    
    city.utils <- rbind(city.utils,moves.worst)  
  }
  
  city.names <-get_cbsa_names()
  
  city.utils <- city.utils %>% 
    right_join(city.names,by=c("cbsa") ) %>%
    filter(!is.na(city.util) & !is.infinite(city.util))
  
  return(city.utils)
  
}




make_cities_dataset <- function(city.utils) {
  
  
  migration.by.firms <-bg.fx.load_migration_data_by_firm(only_if_missing = TRUE) 
  
  migration.cbsa.external <-bg.fx.load_migration_data_by_cbsa_with_external() %>%
    filter(year==2010)
  
  
  all.external <- migration.cbsa.external %>%
    select(contains("dest")) %>%
    distinct() 
  
  names(all.external) <- gsub("dest\\.","",names(all.external))  
  
  
  cities.pop2010 <- migration.cbsa.external %>%
    select(source.cbsa , pop2010) %>%
    unique() %>%
    rename("cbsa" = "source.cbsa")
  
  
  cities.vc<- migration.cbsa.external %>%
    select(source.cbsa , venture_capital.num_startups_financed) %>%
    unique() %>%
    rename("cbsa" = "source.cbsa")
  
  
  cities.births <- migration.by.firms %>%
    filter(move10 == 0) %>%
    group_by(cbsa) %>%
    tally() %>%
    rename(startup.births=n)
  
  
  movesin <- migration.cbsa  %>% group_by(source.cbsa) %>% summarise(moves.out=sum(move5)) %>%
    rename("cbsa"="source.cbsa")
  
  movesout <- migration.cbsa  %>% group_by(dest.cbsa) %>% summarise(moves.in=sum(move5)) %>% 
    rename("cbsa"="dest.cbsa")
  
  moves.net <- full_join(movesin,movesout, by=c("cbsa")) %>%
    mutate(moves.ratio = moves.out/moves.in)
  
  
  cities.dataset <- city.utils %>%
    inner_join(cities.pop2010, by=c("cbsa")) %>% 
    inner_join(cities.vc , by=c("cbsa")) %>%
    inner_join(cities.births , by=c("cbsa")) %>%
    left_join(moves.net) %>%
    left_join(all.external)
  
  cities.dataset <- cities.dataset %>% mutate(inv.startup.births = -1*log(startup.births+1))
  
  nrow(cities.dataset)
  return(cities.dataset)
}





estimate_obs_and_recpi_utils <- function(migration.cbsa) { 
  city.utils <- estimate_city_utility(migration.cbsa, filter='count')
  #test_all_cbsas_present(city.utils)
  
  city.utils.q <- estimate_city_utility(migration.cbsa %>% mutate(move5 = move5recpi), filter='recpi')
  city.utils <- city.utils %>%
    left_join(city.utils.q %>%
                select(cbsa, city.util, city.rank) %>%
                rename("city.util.q" = "city.util","city.rank.q" = "city.rank"),
              by=c("cbsa"))
  
  return(city.utils)
}







rerank <- function(x, also_llc=FALSE) {
  
  y <- rank_utils(x, also_llc) %>%
    arrange(desc(city.DEobs)) %>%
    mutate (
      eship.rank = dplyr::row_number()
    ) %>%
    arrange(desc(city.DEobs/pop2010)) %>%
    mutate(
      eship_by_pop.rank = dplyr::row_number()
    ) 
  
  return (y)
  
}

rank_utils <- function(x, also_llc = FALSE) { 
   x2 <- x %>%
      arrange(desc(city.util)) %>%
      mutate(
        city.rank = dplyr::row_number()
      ) %>%
      arrange(desc(city.util.q)) %>%
      mutate(
        city.rank.q = dplyr::row_number()
      )
   
   if (also_llc) {
     x2 <- x2 %>%
       arrange(desc(city.util.llc)) %>%
       mutate(city.rank.llc = dplyr::row_number())
   }
   return (x2)
   
}
